Packages and Inputs

library(xgboost) # for xgboost
library(tidyverse) # general utility functions
diseaseInfo <- read_csv("C:/Users/User/Desktop/university/machine learning/machine learning - salini/dataset/Outbreak_240817.csv")
Parsed with column specification:
cols(
  .default = col_character(),
  Id = col_double(),
  latitude = col_double(),
  longitude = col_double(),
  sumAtRisk = col_double(),
  sumCases = col_double(),
  sumDeaths = col_double(),
  sumDestroyed = col_double(),
  sumSlaughtered = col_double(),
  humansAge = col_double(),
  humansAffected = col_double(),
  humansDeaths = col_double()
)
See spec(...) for full column specifications.

Preparing our data & selecting features

The core xgboost function requires data to be a matrix. A matrix is like a dataframe that only has numbers in it. A sparse matrix is a matrix that has a lot zeros in it. XGBoost has a built-in datatype, DMatrix, that is particularly good at storing and accessing sparse matrices efficiently.

head(diseaseInfo)

our data will need some cleaning before it’s ready to be put in a matrix. To prepare our data, we have a number of steps we need to complete:

Remove information about the target variable from the training data

diseaseInfo_humansRemoved <- diseaseInfo %>% select(-starts_with("human")) # get the subset of the dataframe that doesn't have labels about humans affected by the disease

Let’s create a new vector with the labels

diseaseLabels <- diseaseInfo %>% 
  select(humansAffected) %>% # get the column with the # of humans affected
  is.na() %>% # is it NA?
  magrittr::not() # switch TRUE and FALSE (using function from the magrittr package)

# check out the first few lines
head(diseaseLabels) # of our target variable
     humansAffected
[1,]          FALSE
[2,]          FALSE
[3,]          FALSE
[4,]          FALSE
[5,]          FALSE
[6,]          FALSE
head(diseaseInfo$humansAffected) # of the original column
[1] NA NA NA NA NA NA

Reduce the amount of redundant information

diseaseInfo_numeric <- diseaseInfo_humansRemoved %>%
    select(-Id) %>% # the case id shouldn't contain useful information
    select(-c(longitude, latitude)) %>% # location data is also in country data
    select_if(is.numeric) # select remaining numeric columns

# make sure that our dataframe is all numeric
str(diseaseInfo_numeric)
tibble [17,008 x 5] (S3: tbl_df/tbl/data.frame)
 $ sumAtRisk     : num [1:17008] 248000 122 1283 NA NA ...
 $ sumCases      : num [1:17008] 12 6 112 1 1 1 19 2 1600 5 ...
 $ sumDeaths     : num [1:17008] 12 1 0 1 1 1 19 2 0 5 ...
 $ sumDestroyed  : num [1:17008] 50000 0 NA 0 NA NA 0 0 4000 0 ...
 $ sumSlaughtered: num [1:17008] 0 0 7 0 NA NA 0 0 0 0 ...

Convert categorical information (like country) to a numeric format

head(diseaseInfo$country)
[1] "South Africa"       "Russian Federation" "Zimbabwe"           "South Africa"      
[5] "Czech Republic"     "Czech Republic"    
model.matrix(~country-1,head(diseaseInfo)) # one-hot matrix for just the first few rows of the "country" column
  countryCzech Republic countryRussian Federation countrySouth Africa countryZimbabwe
1                     0                         0                   1               0
2                     0                         1                   0               0
3                     0                         0                   0               1
4                     0                         0                   1               0
5                     1                         0                   0               0
6                     1                         0                   0               0
attr(,"assign")
[1] 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$country
[1] "contr.treatment"
region <- model.matrix(~country-1,diseaseInfo)

# check out the first few lines of the species
head(diseaseInfo$speciesDescription)
[1] "domestic, unspecified bird" "domestic, swine"            "domestic, cattle"          
[4] "wild, unspecified bird"     "wild, wild boar"            "wild, wild boar"           

diseaseInfo_numeric$is_domestic <- str_detect(diseaseInfo$speciesDescription, "domestic")
# grab the last word of each row and use that to create a one-hot matrix of different species

# get a list of all the species by getting the last
speciesList <- diseaseInfo$speciesDescription %>%
    str_replace("[[:punct:]]", "") %>% # remove punctuation (some rows have parentheses)
    str_extract("[a-z]*$") # extract the least word in each row

# convert our list into a dataframe...
speciesList <- tibble(species = speciesList)

# and convert to a matrix using 1 hot encoding
options(na.action='na.pass') # don't drop NA values!
species <- model.matrix(~species-1,speciesList)

# add our one-hot encoded variable and convert the dataframe into a matrix
diseaseInfo_numeric <- cbind(diseaseInfo_numeric, region, species)
diseaseInfo_matrix <- data.matrix(diseaseInfo_numeric)

Split the dataset to model

# get the numb 70/30 training test split
numberOfTrainingSamples <- round(length(diseaseLabels) * .7)

# training data
train_data <- diseaseInfo_matrix[1:numberOfTrainingSamples,]
train_labels <- diseaseLabels[1:numberOfTrainingSamples]

# testing data
test_data <- diseaseInfo_matrix[-(1:numberOfTrainingSamples),]
test_labels <- diseaseLabels[-(1:numberOfTrainingSamples)]
# put our testing & training data into two seperates Dmatrixs objects
dtrain <- xgb.DMatrix(data = train_data, label= train_labels)
dtest <- xgb.DMatrix(data = test_data, label= test_labels)

Analysis

Supervised Learning

set.seed(1234)
diseaseInfo <- diseaseInfo[sample(1:nrow(diseaseInfo)), ]
model <- xgboost(data = dtrain,  
                 nround = 2, # max number of boosting iterations
                 objective = "binary:logistic")  # objective function
[1] train-error:0.019654 
[2] train-error:0.019654 
# generate predictions for our held-out testing data
pred <- predict(model, dtest)

# get & print the classification error
err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
[1] "test-error= 0.000980007840062721"

Tuning our Model

# train an xgboost model
model_tuned <- xgboost(data = dtrain,          
                 max.depth = 3, # maximum depth of each decision tree
                 nround = 2, # max number of boosting iterations
                 objective = "binary:logistic") # objective function 
[1] train-error:0.019654 
[2] train-error:0.019654 
# generate predictions for our held-out testing data
pred <- predict(model_tuned, dtest)

# get & print the classification error
err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
[1] "test-error= 0.000980007840062721"

There are two things we can try to see if we improve our model performance: - Account for the fact that we have imbalanced classes - Train for more rounds

# get the number of negative & positive cases in our data
negative_cases <- sum(train_labels == FALSE)
postive_cases <- sum(train_labels == TRUE)

# train a model using our training data
model_tuned <- xgboost(data = dtrain,           
                 max.depth = 3, # maximum depth of each decision tree
                 nround = 10, # number of boosting rounds
                 early_stopping_rounds = 3, # if we don't see an improvement in this many rounds, stop
                 objective = "binary:logistic", # objective function
                 scale_pos_weight = negative_cases/postive_cases) # control for imbalanced classes
[1] train-error:0.020410 
Will train until train_error hasn't improved in 3 rounds.

[2] train-error:0.020494 
[3] train-error:0.020494 
[4] train-error:0.020914 
Stopping. Best iteration:
[1] train-error:0.020410
# generate predictions for our held-out testing data
pred <- predict(model_tuned, dtest)

# get & print the classification error
err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
[1] "test-error= 0.00392003136025088"

… TODO

# train a model using our training data
model_tuned <- xgboost(data = dtrain,            
                 max.depth = 3, # maximum depth of each decision tree
                 nround = 10, # number of boosting rounds
                 early_stopping_rounds = 3, # if we don't see an improvement in this many rounds, stop
                 objective = "binary:logistic", # objective function
                 scale_pos_weight = negative_cases/postive_cases, # control for imbalanced classes
                 gamma = 1) # add a regularization term
[1] train-error:0.020410 
Will train until train_error hasn't improved in 3 rounds.

[2] train-error:0.020494 
[3] train-error:0.020494 
[4] train-error:0.020914 
Stopping. Best iteration:
[1] train-error:0.020410
# generate predictions for our held-out testing data
pred <- predict(model_tuned, dtest)

# get & print the classification error
err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
[1] "test-error= 0.00392003136025088"

Interpretation

# plot them features! what's contributing most to our model?
xgb.plot.multi.trees(feature_names = names(diseaseInfo_matrix), 
                     model = model)
Column 2 ['No'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for  backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Because we’re using a logistic model here, it’s telling us the log-odds rather than the probability

# convert log odds to probability
odds_to_probs <- function(odds){
    return(exp(odds) / (1 + exp(odds)))
}

# probability of leaf above countryPortugul
odds_to_probs(-0.599)
[1] 0.3545725
# get information on how important each feature is
importance_matrix <- xgb.importance(names(diseaseInfo_matrix), model = model)

# and plot it!
xgb.plot.importance(importance_matrix)

Unsupervised Learning

# diseaseInfo_numeric.pca <- prcomp(diseaseInfo_numeric[, c(1:7,10,11)],
#                                   center = TRUE,
#                                   scale = TRUE)
diseaseInfo_numeric
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFBhY2thZ2VzIGFuZCBJbnB1dHMNCg0KYGBge3IgbGlicmFyaWVzIGluIHVzZX0NCmxpYnJhcnkoeGdib29zdCkgIyBmb3IgeGdib29zdA0KbGlicmFyeSh0aWR5dmVyc2UpICMgZ2VuZXJhbCB1dGlsaXR5IGZ1bmN0aW9ucw0KYGBgDQoNCmBgYHtyIHJlYWQgaW4gb3VyIGRhdGEgJiBwdXQgaXQgaW4gYSBkYXRhIGZyYW1lfQ0KZGlzZWFzZUluZm8gPC0gcmVhZF9jc3YoIkM6L1VzZXJzL1VzZXIvRGVza3RvcC91bml2ZXJzaXR5L21hY2hpbmUgbGVhcm5pbmcvbWFjaGluZSBsZWFybmluZyAtIHNhbGluaS9kYXRhc2V0L091dGJyZWFrXzI0MDgxNy5jc3YiKQ0KYGBgDQoNCiMgUHJlcGFyaW5nIG91ciBkYXRhICYgc2VsZWN0aW5nIGZlYXR1cmVzDQoNClRoZSBjb3JlIHhnYm9vc3QgZnVuY3Rpb24gcmVxdWlyZXMgZGF0YSB0byBiZSBhIG1hdHJpeC4NCkEgbWF0cml4IGlzIGxpa2UgYSBkYXRhZnJhbWUgdGhhdCBvbmx5IGhhcyBudW1iZXJzIGluIGl0LiBBIHNwYXJzZSBtYXRyaXggaXMgYSBtYXRyaXggdGhhdCBoYXMgYSBsb3QgemVyb3MgaW4gaXQuIFhHQm9vc3QgaGFzIGEgYnVpbHQtaW4gZGF0YXR5cGUsIERNYXRyaXgsIHRoYXQgaXMgcGFydGljdWxhcmx5IGdvb2QgYXQgc3RvcmluZyBhbmQgYWNjZXNzaW5nIHNwYXJzZSBtYXRyaWNlcyBlZmZpY2llbnRseS4NCg0KYGBge3IgcHJpbnQgdGhlIGZpcnN0IGZldyByb3dzIG9mIG91ciBkYXRhZnJhbWV9DQpoZWFkKGRpc2Vhc2VJbmZvKQ0KYGBgDQoNCm91ciBkYXRhIHdpbGwgbmVlZCBzb21lIGNsZWFuaW5nIGJlZm9yZSBpdCdzIHJlYWR5IHRvIGJlIHB1dCBpbiBhIG1hdHJpeC4gVG8gcHJlcGFyZSBvdXIgZGF0YSwgd2UgaGF2ZSBhIG51bWJlciBvZiBzdGVwcyB3ZSBuZWVkIHRvIGNvbXBsZXRlOg0KDQotIFJlbW92ZSBpbmZvcm1hdGlvbiBhYm91dCB0aGUgdGFyZ2V0IHZhcmlhYmxlIGZyb20gdGhlIHRyYWluaW5nIGRhdGENCi0gUmVkdWNlIHRoZSBhbW91bnQgb2YgcmVkdW5kYW50IGluZm9ybWF0aW9uDQotIENvbnZlcnQgY2F0ZWdvcmljYWwgaW5mb3JtYXRpb24gKGxpa2UgY291bnRyeSkgdG8gYSBudW1lcmljIGZvcm1hdA0KLSBTcGxpdCBkYXRhc2V0IGludG8gdGVzdGluZyBhbmQgdHJhaW5pbmcgc3Vic2V0cw0KLSBDb252ZXJ0IHRoZSBjbGVhbmVkIGRhdGFmcmFtZSB0byBhIERtYXRyaXgNCg0KIyMgUmVtb3ZlIGluZm9ybWF0aW9uIGFib3V0IHRoZSB0YXJnZXQgdmFyaWFibGUgZnJvbSB0aGUgdHJhaW5pbmcgZGF0YQ0KDQpgYGB7ciByZW1vdmUgdGhlIGNvbHVtbnMgdGhhdCBoYXZlIGluZm9ybWF0aW9uIG9uIG91ciB0YXJnZXQgdmFyaWFibGV9DQpkaXNlYXNlSW5mb19odW1hbnNSZW1vdmVkIDwtIGRpc2Vhc2VJbmZvICU+JSBzZWxlY3QoLXN0YXJ0c193aXRoKCJodW1hbiIpKSAjIGdldCB0aGUgc3Vic2V0IG9mIHRoZSBkYXRhZnJhbWUgdGhhdCBkb2Vzbid0IGhhdmUgbGFiZWxzIGFib3V0IGh1bWFucyBhZmZlY3RlZCBieSB0aGUgZGlzZWFzZQ0KYGBgDQoNCkxldCdzIGNyZWF0ZSBhIG5ldyB2ZWN0b3Igd2l0aCB0aGUgbGFiZWxzDQoNCmBgYHtyIGdldCBhIGJvb2xlYW4gdmVjdG9yIG9mIHRyYWluaW5nIGxhYmVsc30NCmRpc2Vhc2VMYWJlbHMgPC0gZGlzZWFzZUluZm8gJT4lIA0KICBzZWxlY3QoaHVtYW5zQWZmZWN0ZWQpICU+JSAjIGdldCB0aGUgY29sdW1uIHdpdGggdGhlICMgb2YgaHVtYW5zIGFmZmVjdGVkDQogIGlzLm5hKCkgJT4lICMgaXMgaXQgTkE/DQogIG1hZ3JpdHRyOjpub3QoKSAjIHN3aXRjaCBUUlVFIGFuZCBGQUxTRSAodXNpbmcgZnVuY3Rpb24gZnJvbSB0aGUgbWFncml0dHIgcGFja2FnZSkNCg0KIyBjaGVjayBvdXQgdGhlIGZpcnN0IGZldyBsaW5lcw0KaGVhZChkaXNlYXNlTGFiZWxzKSAjIG9mIG91ciB0YXJnZXQgdmFyaWFibGUNCmhlYWQoZGlzZWFzZUluZm8kaHVtYW5zQWZmZWN0ZWQpICMgb2YgdGhlIG9yaWdpbmFsIGNvbHVtbg0KYGBgDQoNCiMjIFJlZHVjZSB0aGUgYW1vdW50IG9mIHJlZHVuZGFudCBpbmZvcm1hdGlvbg0KDQpgYGB7ciBzZWxlY3QganVzdCB0aGUgbnVtZXJpYyBjb2x1bW5zfQ0KZGlzZWFzZUluZm9fbnVtZXJpYyA8LSBkaXNlYXNlSW5mb19odW1hbnNSZW1vdmVkICU+JQ0KICAgIHNlbGVjdCgtSWQpICU+JSAjIHRoZSBjYXNlIGlkIHNob3VsZG4ndCBjb250YWluIHVzZWZ1bCBpbmZvcm1hdGlvbg0KICAgIHNlbGVjdCgtYyhsb25naXR1ZGUsIGxhdGl0dWRlKSkgJT4lICMgbG9jYXRpb24gZGF0YSBpcyBhbHNvIGluIGNvdW50cnkgZGF0YQ0KICAgIHNlbGVjdF9pZihpcy5udW1lcmljKSAjIHNlbGVjdCByZW1haW5pbmcgbnVtZXJpYyBjb2x1bW5zDQoNCiMgbWFrZSBzdXJlIHRoYXQgb3VyIGRhdGFmcmFtZSBpcyBhbGwgbnVtZXJpYw0Kc3RyKGRpc2Vhc2VJbmZvX251bWVyaWMpDQpgYGANCg0KIyMgQ29udmVydCBjYXRlZ29yaWNhbCBpbmZvcm1hdGlvbiAobGlrZSBjb3VudHJ5KSB0byBhIG51bWVyaWMgZm9ybWF0DQoNCmBgYHtyIGNoZWNrIG91dCB0aGUgZmlyc3QgZmV3IHJvd3Mgb2YgdGhlIGNvdW50cnkgY29sdW1ufQ0KaGVhZChkaXNlYXNlSW5mbyRjb3VudHJ5KQ0KYGBgDQoNCmBgYHtyIGNvbnZlcnQgdGhlc2UgY2F0ZWdvcmllcyB0byBhIG1hdHJpeH0NCm1vZGVsLm1hdHJpeCh+Y291bnRyeS0xLGhlYWQoZGlzZWFzZUluZm8pKSAjIG9uZS1ob3QgbWF0cml4IGZvciBqdXN0IHRoZSBmaXJzdCBmZXcgcm93cyBvZiB0aGUgImNvdW50cnkiIGNvbHVtbg0KYGBgDQoNCmBgYHtyIGNvbnZlcnQgY2F0ZWdvcmljYWwgZmFjdG9yIGludG8gb25lLWhvdCBlbmNvZGluZ30NCnJlZ2lvbiA8LSBtb2RlbC5tYXRyaXgofmNvdW50cnktMSxkaXNlYXNlSW5mbykNCg0KIyBjaGVjayBvdXQgdGhlIGZpcnN0IGZldyBsaW5lcyBvZiB0aGUgc3BlY2llcw0KaGVhZChkaXNlYXNlSW5mbyRzcGVjaWVzRGVzY3JpcHRpb24pDQpgYGANCg0KYGBge3IgYWRkIGEgYm9vbGVhbiBjb2x1bW4gdG8gb3VyIG51bWVyaWMgZGF0YWZyYW1lIGluZGljYXRpbmcgd2hldGhlciBhIHNwZWNpZXMgaXMgZG9tZXN0aWN9DQoNCmRpc2Vhc2VJbmZvX251bWVyaWMkaXNfZG9tZXN0aWMgPC0gc3RyX2RldGVjdChkaXNlYXNlSW5mbyRzcGVjaWVzRGVzY3JpcHRpb24sICJkb21lc3RpYyIpDQpgYGANCg0KYGBge3IgY3JlYXRlIGEgb25lLWhvdCBtYXRyaXggb2YgZGlmZmVyZW50IHNwZWNpZXN9DQojIGdyYWIgdGhlIGxhc3Qgd29yZCBvZiBlYWNoIHJvdyBhbmQgdXNlIHRoYXQgdG8gY3JlYXRlIGEgb25lLWhvdCBtYXRyaXggb2YgZGlmZmVyZW50IHNwZWNpZXMNCg0KIyBnZXQgYSBsaXN0IG9mIGFsbCB0aGUgc3BlY2llcyBieSBnZXR0aW5nIHRoZSBsYXN0DQpzcGVjaWVzTGlzdCA8LSBkaXNlYXNlSW5mbyRzcGVjaWVzRGVzY3JpcHRpb24gJT4lDQogICAgc3RyX3JlcGxhY2UoIltbOnB1bmN0Ol1dIiwgIiIpICU+JSAjIHJlbW92ZSBwdW5jdHVhdGlvbiAoc29tZSByb3dzIGhhdmUgcGFyZW50aGVzZXMpDQogICAgc3RyX2V4dHJhY3QoIlthLXpdKiQiKSAjIGV4dHJhY3QgdGhlIGxlYXN0IHdvcmQgaW4gZWFjaCByb3cNCg0KIyBjb252ZXJ0IG91ciBsaXN0IGludG8gYSBkYXRhZnJhbWUuLi4NCnNwZWNpZXNMaXN0IDwtIHRpYmJsZShzcGVjaWVzID0gc3BlY2llc0xpc3QpDQoNCiMgYW5kIGNvbnZlcnQgdG8gYSBtYXRyaXggdXNpbmcgMSBob3QgZW5jb2RpbmcNCm9wdGlvbnMobmEuYWN0aW9uPSduYS5wYXNzJykgIyBkb24ndCBkcm9wIE5BIHZhbHVlcyENCnNwZWNpZXMgPC0gbW9kZWwubWF0cml4KH5zcGVjaWVzLTEsc3BlY2llc0xpc3QpDQoNCiMgYWRkIG91ciBvbmUtaG90IGVuY29kZWQgdmFyaWFibGUgYW5kIGNvbnZlcnQgdGhlIGRhdGFmcmFtZSBpbnRvIGEgbWF0cml4DQpkaXNlYXNlSW5mb19udW1lcmljIDwtIGNiaW5kKGRpc2Vhc2VJbmZvX251bWVyaWMsIHJlZ2lvbiwgc3BlY2llcykNCmRpc2Vhc2VJbmZvX21hdHJpeCA8LSBkYXRhLm1hdHJpeChkaXNlYXNlSW5mb19udW1lcmljKQ0KYGBgDQoNCiMjIFNwbGl0IHRoZSBkYXRhc2V0IHRvIG1vZGVsDQoNCmBgYHtyIFNwbGl0IGRhdGFzZXQgaW50byB0ZXN0aW5nIGFuZCB0cmFpbmluZyBzdWJzZXRzfQ0KIyBnZXQgdGhlIG51bWIgNzAvMzAgdHJhaW5pbmcgdGVzdCBzcGxpdA0KbnVtYmVyT2ZUcmFpbmluZ1NhbXBsZXMgPC0gcm91bmQobGVuZ3RoKGRpc2Vhc2VMYWJlbHMpICogLjcpDQoNCiMgdHJhaW5pbmcgZGF0YQ0KdHJhaW5fZGF0YSA8LSBkaXNlYXNlSW5mb19tYXRyaXhbMTpudW1iZXJPZlRyYWluaW5nU2FtcGxlcyxdDQp0cmFpbl9sYWJlbHMgPC0gZGlzZWFzZUxhYmVsc1sxOm51bWJlck9mVHJhaW5pbmdTYW1wbGVzXQ0KDQojIHRlc3RpbmcgZGF0YQ0KdGVzdF9kYXRhIDwtIGRpc2Vhc2VJbmZvX21hdHJpeFstKDE6bnVtYmVyT2ZUcmFpbmluZ1NhbXBsZXMpLF0NCnRlc3RfbGFiZWxzIDwtIGRpc2Vhc2VMYWJlbHNbLSgxOm51bWJlck9mVHJhaW5pbmdTYW1wbGVzKV0NCmBgYA0KDQpgYGB7ciBDb252ZXJ0IHRoZSBjbGVhbmVkIGRhdGFmcmFtZSB0byBhIGRtYXRyaXh9DQojIHB1dCBvdXIgdGVzdGluZyAmIHRyYWluaW5nIGRhdGEgaW50byB0d28gc2VwZXJhdGVzIERtYXRyaXhzIG9iamVjdHMNCmR0cmFpbiA8LSB4Z2IuRE1hdHJpeChkYXRhID0gdHJhaW5fZGF0YSwgbGFiZWw9IHRyYWluX2xhYmVscykNCmR0ZXN0IDwtIHhnYi5ETWF0cml4KGRhdGEgPSB0ZXN0X2RhdGEsIGxhYmVsPSB0ZXN0X2xhYmVscykNCmBgYA0KDQojIEFuYWx5c2lzDQoNCiMjIFN1cGVydmlzZWQgTGVhcm5pbmcNCg0KYGBge3Igc2V0IGEgcmFuZG9tIHNlZWQgJiBzaHVmZmxlIGRhdGEgZnJhbWV9DQpzZXQuc2VlZCgxMjM0KQ0KZGlzZWFzZUluZm8gPC0gZGlzZWFzZUluZm9bc2FtcGxlKDE6bnJvdyhkaXNlYXNlSW5mbykpLCBdDQpgYGANCg0KYGBge3IgdHJhaW4gYSBtb2RlbCB1c2luZyBvdXIgdHJhaW5pbmcgZGF0YX0NCm1vZGVsIDwtIHhnYm9vc3QoZGF0YSA9IGR0cmFpbiwgIA0KICAgICAgICAgICAgICAgICBucm91bmQgPSAyLCAjIG1heCBudW1iZXIgb2YgYm9vc3RpbmcgaXRlcmF0aW9ucw0KICAgICAgICAgICAgICAgICBvYmplY3RpdmUgPSAiYmluYXJ5OmxvZ2lzdGljIikgICMgb2JqZWN0aXZlIGZ1bmN0aW9uDQpgYGANCg0KYGBge3IgbWFrZSBwcmVkaWN0aW9ufQ0KIyBnZW5lcmF0ZSBwcmVkaWN0aW9ucyBmb3Igb3VyIGhlbGQtb3V0IHRlc3RpbmcgZGF0YQ0KcHJlZCA8LSBwcmVkaWN0KG1vZGVsLCBkdGVzdCkNCg0KIyBnZXQgJiBwcmludCB0aGUgY2xhc3NpZmljYXRpb24gZXJyb3INCmVyciA8LSBtZWFuKGFzLm51bWVyaWMocHJlZCA+IDAuNSkgIT0gdGVzdF9sYWJlbHMpDQpwcmludChwYXN0ZSgidGVzdC1lcnJvcj0iLCBlcnIpKQ0KYGBgDQoNCiMjIFR1bmluZyAgb3VyIE1vZGVsDQoNCmBgYHtyfQ0KIyB0cmFpbiBhbiB4Z2Jvb3N0IG1vZGVsDQptb2RlbF90dW5lZCA8LSB4Z2Jvb3N0KGRhdGEgPSBkdHJhaW4sICAgICAgICAgIA0KICAgICAgICAgICAgICAgICBtYXguZGVwdGggPSAzLCAjIG1heGltdW0gZGVwdGggb2YgZWFjaCBkZWNpc2lvbiB0cmVlDQogICAgICAgICAgICAgICAgIG5yb3VuZCA9IDIsICMgbWF4IG51bWJlciBvZiBib29zdGluZyBpdGVyYXRpb25zDQogICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9ICJiaW5hcnk6bG9naXN0aWMiKSAjIG9iamVjdGl2ZSBmdW5jdGlvbiANCg0KIyBnZW5lcmF0ZSBwcmVkaWN0aW9ucyBmb3Igb3VyIGhlbGQtb3V0IHRlc3RpbmcgZGF0YQ0KcHJlZCA8LSBwcmVkaWN0KG1vZGVsX3R1bmVkLCBkdGVzdCkNCg0KIyBnZXQgJiBwcmludCB0aGUgY2xhc3NpZmljYXRpb24gZXJyb3INCmVyciA8LSBtZWFuKGFzLm51bWVyaWMocHJlZCA+IDAuNSkgIT0gdGVzdF9sYWJlbHMpDQpwcmludChwYXN0ZSgidGVzdC1lcnJvcj0iLCBlcnIpKQ0KYGBgDQoNClRoZXJlIGFyZSB0d28gdGhpbmdzIHdlIGNhbiB0cnkgdG8gc2VlIGlmIHdlIGltcHJvdmUgb3VyIG1vZGVsIHBlcmZvcm1hbmNlOg0KLSBBY2NvdW50IGZvciB0aGUgZmFjdCB0aGF0IHdlIGhhdmUgaW1iYWxhbmNlZCBjbGFzc2VzDQotIFRyYWluIGZvciBtb3JlIHJvdW5kcw0KDQpgYGB7ciByZS10cmFpbmluZyBvdXIgbW9kZWx9DQojIGdldCB0aGUgbnVtYmVyIG9mIG5lZ2F0aXZlICYgcG9zaXRpdmUgY2FzZXMgaW4gb3VyIGRhdGENCm5lZ2F0aXZlX2Nhc2VzIDwtIHN1bSh0cmFpbl9sYWJlbHMgPT0gRkFMU0UpDQpwb3N0aXZlX2Nhc2VzIDwtIHN1bSh0cmFpbl9sYWJlbHMgPT0gVFJVRSkNCg0KIyB0cmFpbiBhIG1vZGVsIHVzaW5nIG91ciB0cmFpbmluZyBkYXRhDQptb2RlbF90dW5lZCA8LSB4Z2Jvb3N0KGRhdGEgPSBkdHJhaW4sICAgICAgICAgICANCiAgICAgICAgICAgICAgICAgbWF4LmRlcHRoID0gMywgIyBtYXhpbXVtIGRlcHRoIG9mIGVhY2ggZGVjaXNpb24gdHJlZQ0KICAgICAgICAgICAgICAgICBucm91bmQgPSAxMCwgIyBudW1iZXIgb2YgYm9vc3Rpbmcgcm91bmRzDQogICAgICAgICAgICAgICAgIGVhcmx5X3N0b3BwaW5nX3JvdW5kcyA9IDMsICMgaWYgd2UgZG9uJ3Qgc2VlIGFuIGltcHJvdmVtZW50IGluIHRoaXMgbWFueSByb3VuZHMsIHN0b3ANCiAgICAgICAgICAgICAgICAgb2JqZWN0aXZlID0gImJpbmFyeTpsb2dpc3RpYyIsICMgb2JqZWN0aXZlIGZ1bmN0aW9uDQogICAgICAgICAgICAgICAgIHNjYWxlX3Bvc193ZWlnaHQgPSBuZWdhdGl2ZV9jYXNlcy9wb3N0aXZlX2Nhc2VzKSAjIGNvbnRyb2wgZm9yIGltYmFsYW5jZWQgY2xhc3Nlcw0KDQojIGdlbmVyYXRlIHByZWRpY3Rpb25zIGZvciBvdXIgaGVsZC1vdXQgdGVzdGluZyBkYXRhDQpwcmVkIDwtIHByZWRpY3QobW9kZWxfdHVuZWQsIGR0ZXN0KQ0KDQojIGdldCAmIHByaW50IHRoZSBjbGFzc2lmaWNhdGlvbiBlcnJvcg0KZXJyIDwtIG1lYW4oYXMubnVtZXJpYyhwcmVkID4gMC41KSAhPSB0ZXN0X2xhYmVscykNCnByaW50KHBhc3RlKCJ0ZXN0LWVycm9yPSIsIGVycikpDQpgYGANCg0KLi4uIFRPRE8NCg0KYGBge3IgfQ0KIyB0cmFpbiBhIG1vZGVsIHVzaW5nIG91ciB0cmFpbmluZyBkYXRhDQptb2RlbF90dW5lZCA8LSB4Z2Jvb3N0KGRhdGEgPSBkdHJhaW4sICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgIG1heC5kZXB0aCA9IDMsICMgbWF4aW11bSBkZXB0aCBvZiBlYWNoIGRlY2lzaW9uIHRyZWUNCiAgICAgICAgICAgICAgICAgbnJvdW5kID0gMTAsICMgbnVtYmVyIG9mIGJvb3N0aW5nIHJvdW5kcw0KICAgICAgICAgICAgICAgICBlYXJseV9zdG9wcGluZ19yb3VuZHMgPSAzLCAjIGlmIHdlIGRvbid0IHNlZSBhbiBpbXByb3ZlbWVudCBpbiB0aGlzIG1hbnkgcm91bmRzLCBzdG9wDQogICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9ICJiaW5hcnk6bG9naXN0aWMiLCAjIG9iamVjdGl2ZSBmdW5jdGlvbg0KICAgICAgICAgICAgICAgICBzY2FsZV9wb3Nfd2VpZ2h0ID0gbmVnYXRpdmVfY2FzZXMvcG9zdGl2ZV9jYXNlcywgIyBjb250cm9sIGZvciBpbWJhbGFuY2VkIGNsYXNzZXMNCiAgICAgICAgICAgICAgICAgZ2FtbWEgPSAxKSAjIGFkZCBhIHJlZ3VsYXJpemF0aW9uIHRlcm0NCg0KIyBnZW5lcmF0ZSBwcmVkaWN0aW9ucyBmb3Igb3VyIGhlbGQtb3V0IHRlc3RpbmcgZGF0YQ0KcHJlZCA8LSBwcmVkaWN0KG1vZGVsX3R1bmVkLCBkdGVzdCkNCg0KIyBnZXQgJiBwcmludCB0aGUgY2xhc3NpZmljYXRpb24gZXJyb3INCmVyciA8LSBtZWFuKGFzLm51bWVyaWMocHJlZCA+IDAuNSkgIT0gdGVzdF9sYWJlbHMpDQpwcmludChwYXN0ZSgidGVzdC1lcnJvcj0iLCBlcnIpKQ0KYGBgDQoNCiMjIEludGVycHJldGF0aW9uDQoNCmBgYHtyIH0NCiMgcGxvdCB0aGVtIGZlYXR1cmVzISB3aGF0J3MgY29udHJpYnV0aW5nIG1vc3QgdG8gb3VyIG1vZGVsPw0KeGdiLnBsb3QubXVsdGkudHJlZXMoZmVhdHVyZV9uYW1lcyA9IG5hbWVzKGRpc2Vhc2VJbmZvX21hdHJpeCksIA0KICAgICAgICAgICAgICAgICAgICAgbW9kZWwgPSBtb2RlbCkNCmBgYA0KDQpCZWNhdXNlIHdlJ3JlIHVzaW5nIGEgbG9naXN0aWMgbW9kZWwgaGVyZSwgaXQncyB0ZWxsaW5nIHVzIHRoZSBsb2ctb2RkcyByYXRoZXIgdGhhbiB0aGUgcHJvYmFiaWxpdHkNCg0KYGBge3J9DQojIGNvbnZlcnQgbG9nIG9kZHMgdG8gcHJvYmFiaWxpdHkNCm9kZHNfdG9fcHJvYnMgPC0gZnVuY3Rpb24ob2Rkcyl7DQogICAgcmV0dXJuKGV4cChvZGRzKSAvICgxICsgZXhwKG9kZHMpKSkNCn0NCg0KIyBwcm9iYWJpbGl0eSBvZiBsZWFmIGFib3ZlIGNvdW50cnlQb3J0dWd1bA0Kb2Rkc190b19wcm9icygtMC41OTkpDQpgYGANCg0KYGBge3IgcGxvdHRpbmcgdGhlIGltcG9ydGFuY2UgbWF0cml4fQ0KIyBnZXQgaW5mb3JtYXRpb24gb24gaG93IGltcG9ydGFudCBlYWNoIGZlYXR1cmUgaXMNCmltcG9ydGFuY2VfbWF0cml4IDwtIHhnYi5pbXBvcnRhbmNlKG5hbWVzKGRpc2Vhc2VJbmZvX21hdHJpeCksIG1vZGVsID0gbW9kZWwpDQoNCiMgYW5kIHBsb3QgaXQhDQp4Z2IucGxvdC5pbXBvcnRhbmNlKGltcG9ydGFuY2VfbWF0cml4KQ0KDQpgYGANCg0KIyMgVW5zdXBlcnZpc2VkIExlYXJuaW5nDQoNCmBgYHtyfQ0KIyBkaXNlYXNlSW5mb19udW1lcmljLnBjYSA8LSBwcmNvbXAoZGlzZWFzZUluZm9fbnVtZXJpY1ssIGMoMTo3LDEwLDExKV0sDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjZW50ZXIgPSBUUlVFLA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2NhbGUgPSBUUlVFKQ0KZGlzZWFzZUluZm9fbnVtZXJpYw0KYGBgDQoNCg==